View Javadoc

1   /*
2    * SmartCrawler
3    *
4    * $Id: ConfigReader.java,v 1.12 2005/08/05 15:55:52 vincool Exp $
5    * Copyright 2005 Davide Pozza
6    *
7    * This program is free software; you can redistribute it
8    * and/or modify it under the terms of the GNU General Public
9    * License as published by the Free Software Foundation;
10   * either version 2 of the License, or (at your option) any
11   * later version.
12   *
13   * This program is distributed in the hope that it will be
14   * useful, but WITHOUT ANY WARRANTY; without even the implied
15   * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16   * PURPOSE. See the GNU General Public License for more
17   * details.
18   *
19   * You should have received a copy of the GNU General Public
20   * License along with this program; if not, write to the Free
21   * Software Foundation, Inc., 59 Temple Place, Suite 330,
22   * Boston, MA 02111-1307 USA
23   *
24   */
25  package org.smartcrawler.common;
26  
27  import org.apache.commons.configuration.Configuration;
28  import org.apache.commons.configuration.ConfigurationException;
29  import org.apache.commons.configuration.XMLConfiguration;
30  
31  import java.io.File;
32  
33  import java.util.Collection;
34  import java.util.Hashtable;
35  import java.util.TreeMap;
36  import org.smartcrawler.persistence.Persister;
37  import org.smartcrawler.retriever.Retriever;
38  
39  /***
40   *
41   *
42   * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
43   * @version <tt>$Revision: 1.12 $</tt>
44   */
45  public class ConfigReader {
46      /*** Creates a new instance of ConfigReader */
47      public ConfigReader() {
48      }
49  
50      /***
51       *
52       * @param uri
53       * @throws org.apache.commons.configuration.ConfigurationException
54       * @return
55       */
56      public Context readConfig(String uri)
57      throws ConfigurationException {
58          if (uri == null) {
59              throw new IllegalArgumentException("URI passed is null");
60          }
61  
62          return loadConfig(uri);
63      }
64  
65      /***
66       *
67       * @param configFile
68       * @throws org.apache.commons.configuration.ConfigurationException
69       * @return
70       */
71      protected Context loadConfig(String configFile)
72      throws ConfigurationException {
73          Context c = new Context();
74          Configuration conf = new XMLConfiguration(configFile);
75  
76          /*
77          item = conf.getString("retriever.class");
78          try {
79              c.setRetrieverClass(Class.forName(item));
80          } catch(Exception e){
81              c.setRetrieverClass(DefaultRetriever.class);
82          }
83  
84          item = conf.getString("persister.class");
85          try {
86              c.setRetrieverClass(Class.forName(item));
87          } catch(Exception e){
88              c.setRetrieverClass(FileSystemPersister.class);
89          }
90           */
91          c.setLoggers(buildLoggers(conf));
92  
93          //The initialization of the loggers must be performed asap
94          //SCLogger.initialize(c.getLoggers());
95          c.setPrecFiltersList(buildPrecFiltersList(conf));
96  
97          c.setPostFiltersList(buildPostFiltersList(conf));
98  
99          c.setEngineThreadNumber(buildEngineThreadNumber(conf));
100 
101         c.setPersister(buildPersister(conf));
102         c.setRetriever(buildRetriever(conf));
103         return c;
104     }
105 
106     /***
107      *
108      * @param conf
109      * @return
110      */
111     protected Collection buildPrecFiltersList(Configuration conf) {
112         TreeMap map = new TreeMap();
113 
114         try {
115             int n = conf.getList("retriever.filters.filter.class").size();
116             for (int i = 0; i < n; i++) {
117                 String key = conf.getString("retriever.filters.filter(" + i
118                         + ").priority");
119                 String className = conf.getString("retriever.filters.filter("
120                         + i + ").class");
121                 String filterName = conf.getString("retriever.filters.filter("
122                         + i + ").name");
123 
124                 Object value = Class.forName(className).newInstance();
125 
126                 int np = conf.getList(
127                         "retriever.filters.filter("
128                         + i + ").filter-param.param-name")
129                         .size();
130 
131                 if (np > 0) {
132                     Hashtable params = new Hashtable();
133 
134                     for (int ip = 0; ip < np; ip++) {
135                         String parName = conf.getString(
136                                 "retriever.filters.filter("
137                                 + i + ").filter-param(" + ip
138                                 + ").param-name");
139                         String parValue = conf.getString(
140                                 "retriever.filters.filter("
141                                 + i + ").filter-param(" + ip
142                                 + ").param-value");
143 
144                         params.put(parName, parValue);
145                     }//for (int ip = 0; ip < np; ip++)
146 
147                     if (value instanceof AbstractParametrizableComponent) {
148                         ((AbstractParametrizableComponent) value).setParameters(params);
149                     }
150                 }
151 
152                 map.put(key, value);
153             }
154 
155             return map.values();
156         } catch (Exception e) {
157             e.printStackTrace();
158             return null; //ok ?
159         }
160     }
161 
162     /***
163      *
164      * @param conf
165      * @return
166      */
167     protected Collection buildPostFiltersList(Configuration conf) {
168         TreeMap map = new TreeMap();
169 
170         try {
171             int n = conf.getList("persister.filters.filter.class").size();
172 
173             for (int i = 0; i < n; i++) {
174                 String key = conf.getString("persister.filters.filter(" + i
175                         + ").priority");
176                 String className = conf.getString("persister.filters.filter("
177                         + i + ").class");
178                 Object value = Class.forName(className).newInstance();
179 
180                 int np = conf.getList(
181                         "persister.filters.filter("
182                         + i + ").filter-param.param-name")
183                         .size();
184 
185                 if (np > 0) {
186                     Hashtable params = new Hashtable();
187 
188                     for (int ip = 0; ip < np; ip++) {
189                         params.put(conf.getString(
190                                 "persister.filters.filter("
191                                 + i + ").filter-param(" + ip
192                                 + ").param-name"),
193                                 conf.getString(
194                                 "persister.filters.filter("
195                                 + i + ").filter-param(" + ip
196                                 + ").param-value"));
197                     }
198 
199                     if (value instanceof AbstractParametrizableComponent) {
200                         ((AbstractParametrizableComponent) value).setParameters(params);
201                     }
202                 }
203 
204                 map.put(key, value);
205             }
206 
207             return map.values();
208         } catch (Exception e) {
209             e.printStackTrace();
210             return null; //ok ?
211         }
212     }
213 
214     /***
215      *
216      * @param conf
217      * @return
218      */
219     protected int buildEngineThreadNumber(Configuration conf) {
220         return conf.getInteger("engine.threadsNumber", new Integer("5"))
221         .intValue();
222     }
223 
224     /***
225      *
226      * @param conf
227      * @return
228      */
229     protected boolean buildMultiThread(Configuration conf) {
230         return conf.getBoolean("engine.isMultiThread", true);
231     }
232 
233     /***
234      *
235      * @param conf
236      * @return
237      */
238     protected Hashtable buildLoggers(Configuration conf) {
239         Hashtable loggers = new Hashtable();
240 
241         loggers.put("TRACER", "no");
242         loggers.put("ACCESS", "no");
243         loggers.put("LINK", "no");
244         loggers.put("PERMISSIONS", "no");
245         loggers.put("EXTRACTOR", "no");
246         loggers.put("CONSOLE", "no");
247         loggers.put("PERSISTER", "no");
248         loggers.put("PROVIDER", "no");
249 
250         try {
251             int n = conf.getList("loggers.logger[@active]").size();
252 
253             for (int i = 0; i < n; i++) {
254                 String active = conf.getString("loggers.logger(" + i
255                         + ")[@active]");
256                 String str = conf.getString("loggers.logger(" + i + ")[@type]");
257 
258                 loggers.put(str, active);
259             }
260         } catch (Exception e) {
261         }
262 
263         return loggers;
264     }
265 
266     /***
267      *
268      * @param conf
269      * @return
270      */
271 
272     protected Persister buildPersister(Configuration conf) {
273         try {
274 
275             String className = conf.getString("persister.class");
276             Persister persister = (Persister)Class.forName(className).newInstance();
277 
278             int np = conf.getList(
279                     "persister.persister-params.persister-param.param-name").size();
280             if (np > 0) {
281                 Hashtable params = new Hashtable();
282 
283                 for (int ip = 0; ip < np; ip++) {
284                     params.put(
285                             conf.getString(
286                             "persister.persister-params.persister-param("+ip+").param-name"),
287                             conf.getString(
288                             "persister.persister-params.persister-param("+ip+").param-value"));
289                 }
290 
291                 if (persister instanceof AbstractParametrizableComponent) {
292                     ((AbstractParametrizableComponent) persister).setParameters(params);
293                 }
294             }
295 
296             return persister;
297         } catch (Exception e) {
298             e.printStackTrace();
299             return null; //ok ?
300         }
301     }
302 
303     protected Retriever buildRetriever(Configuration conf) {
304         try {
305 
306             String className = conf.getString("retriever.class");
307             Retriever retriever = (Retriever)Class.forName(className).newInstance();
308 
309             int np = conf.getList(
310                     "retriever.retriever-params.retriever-param.param-name").size();
311 
312             if (np > 0) {
313                 Hashtable params = new Hashtable();
314 
315                 for (int ip = 0; ip < np; ip++) {
316                     params.put(
317                             conf.getString(
318                             "retriever.retriever-params.retriever-param("+ip+").param-name"),
319                             conf.getString(
320                             "retriever.retriever-params.retriever-param("+ip+").param-value"));
321                 }
322 
323                 if (retriever instanceof AbstractParametrizableComponent) {
324                     ((AbstractParametrizableComponent) retriever).setParameters(params);
325                 }
326             }
327 
328             return retriever;
329         } catch (Exception e) {
330             e.printStackTrace();
331             return null; //ok ?
332         }
333     }
334 
335 }